from 查找没用的图片 import listDir,printList
from 筛选建筑宝数据 import readSheet
import xlrd
import pandas as pd
import sys
import os
import numpy
import time
def checkHasField(colums,name):
    hasFileField = False
    for x in colums:
        if x.find(name) != -1:
            return x
    if not hasFileField:
        print(file)
    return ""
if __name__ == "__main__":
    files = listDir('/Users/wangshuguan/Desktop/2019签约比对',extensions=["xls","xlsx"],reuslts = [])
    files.remove("/Users/wangshuguan/Desktop/2019签约比对/（区级）2019洛阳市洛龙所有参保人员花名册 (1).xls")
    files.remove("/Users/wangshuguan/Desktop/2019签约比对/（区级）洛龙区（2019.5.7参保信息）.xlsx")
    files.remove("/Users/wangshuguan/Desktop/2019签约比对/城镇居民/关林中心签约名单/2019年签约汇总表(1).xls")
    print(len(files))
    targetFields = ["签约单位","姓名","身份证号","联系电话"]
    fieldnames = []
    dataFrames = []
    for file in files:
        try:
            sheet_names = xlrd.open_workbook(file).sheet_names()
            sheet = pd.read_excel(file, sheet_names[0], index_col=None, na_values=['NA'], header=1,dtype=numpy.str)
        except IOError:
            print("无法打开文件")
            sys.exit()
        colums = list(sheet.columns.to_numpy())
        fields = {}
        for i,x in enumerate(targetFields):
            if len(checkHasField(colums, x)) > 0:
                fields[f"{i}"] = checkHasField(colums, x)
        fieldnames.append(fields)
        sheet2 = sheet[[fields["0"],fields["1"],fields["2"],fields["3"]]]
        sheet2.columns = targetFields
        dataFrames.append(sheet2)
     
#     finalDF = pd.concat(dataFrames,keys=[os.path.basename(x) for x in files])
    finalDF = pd.concat(dataFrames)
#     print(finalDF)
    
#     print(time.time())
    sheetSource = readSheet("/Users/wangshuguan/Desktop/2019签约比对/（区级）2019洛阳市洛龙所有参保人员花名册 (1).xls",header = 0)
#     print(sheetSource[sheetSource.身份证号.isin(sheetSource[sheetSource.身份证号.duplicated()].身份证号)])#过滤重复数据
#     print(finalDF[~finalDF.身份证号.isin(sheetSource.身份证号)])#过滤不在内数据
